##%######################################################%##
#                                                          #
####                   GRaF functions                   ####
#                                                          #
##%######################################################%##


#Functions adapted from the package GRaF (https://github.com/goldingn/GRaF)
#to fit models usign a Gausian-Bayesian approach (Golding & Purse, 2016. doi:10.1111/2041-210X.12523)


#Fitting GRaF Models----

graf <-

  function (y, x, error = NULL, weights = NULL, prior = NULL, l = NULL, opt.l = FALSE,

            theta.prior.pars = c(log(10), 1), hessian = FALSE, opt.control = list(),

            verbose = FALSE, method = c('Laplace', 'EP')) {



    method <- match.arg(method)



    # optionally optimise graf (by recursively calling this function)

    if (opt.l) {



      # get all visible object as a list

      args <- capture.all()



      # get the expected objects

      expected_args <- names(formals(graf))



      # remove any unexpected arguments

      args <- args[names(args) %in% expected_args]



      # pass this to optimiser

      fit <- optimise.graf(args)



      # skip out of this function and return

      return (fit)



    }





    if (!is.data.frame(x)) stop ("x must be a dataframe")



    # convert any ints to numerics

    for(i in 1:ncol(x)) if (is.integer(x[, i])) x[, i] <- as.numeric(x[, i])



    obsx <- x

    k <- ncol(x)

    n <- length(y)



    if (is.null(weights)) {

      # if weights aren't provided

      weights <- rep(1, n)

    } else {

      # if they are, run some checks

      # throw an error if weights are specified with EP

      if (method == 'EP') {

        stop ('weights are not implemented for the EP algorithm (yet)')

      }

      # or if any are negative

      if (any(weights < 0)) {

        stop ('weights must be positive or zero')

      }

    }



    # find factors and convert them to numerics

    notfacs <- 1:k

    facs <- which(unlist(lapply(x, is.factor)))

    if (length(facs) > 0) notfacs <- notfacs[-facs]

    for (fac in facs) {

      x[, fac] <- as.numeric(x[, fac])

    }

    x <- as.matrix(x)



    # scale the matrix, retaining scaling

    scaling <- apply(as.matrix(x[, notfacs]), 2, function(x) c(mean(x), sd(x)))

    for (i in 1:length(notfacs)) {

      x[, notfacs[i]] <- (x[, notfacs[i]] - scaling[1, i]) / scaling[2, i]

    }



    # set up the default prior, if not specified

    exp.prev <- sum(weights[y == 1]) / sum(weights)

    if (is.null(prior))  mnfun <- function(x) rep(exp.prev, nrow(x))

    else mnfun <- prior



    # give an approximation to l, if not specified (or optimised)

    if (is.null(l)) {

      l <- rep(0.01, k)

      l[notfacs] <- apply(x[y == 1, notfacs, drop = FALSE], 2, sd) * 8

      l[l == 0] <- 1

    }



    # calculate mean (on unscaled data and probability scale)

    mn <- mnfun(obsx)



    # fit model

    if (method == 'Laplace') {

      # by Laplace approximation

      fit <- graf.fit.laplace(y = y, x = as.matrix(x), mn = mn, l = l, wt = weights, e = error, verbose = verbose)

    } else {

      # or using the expectation-propagation algorithm

      fit <- graf.fit.ep(y = y, x = as.matrix(x), mn = mn, l = l, wt = weights, e = error, verbose = FALSE)

    }



    fit$mnfun = mnfun

    fit$obsx <- obsx

    fit$facs <- facs

    fit$hessian <- hessian

    fit$scaling <- scaling

    fit$peak = obsx[which(fit$MAP == max(fit$MAP))[1], ]

    class(fit) <- "graf"

    fit

  }



capture.all <- function() {

  # capture all visible objects in the parent environment and pass to a list

  env <- parent.frame()

  object_names <- objects(env)

  objects <- lapply(object_names,

                    get,

                    envir = env)

  names(objects) <- object_names

  return (objects)

}

#Fit Laplace----
graf.fit.laplace <-

  function (y, x, mn, l, wt, e = NULL, tol  = 10 ^ -6, itmax = 50,

            verbose = FALSE) {



    if (is.vector(x)) x <- as.matrix(x)

    mn <- stats::qnorm(mn)

    n <- length(y)



    # create the covariance matrix

    K <- cov.SE(x1 = x, e1 = e, e2 = NULL, l = l)



    # an identity matrix for the calculations

    eye <- diag(n)



    # initialise

    a <- rep(0, n)

    f <- mn

    obj.old <- Inf

    obj <- -sum(wt * d0(f, y))

    it <- 0



    # start newton iterations

    while (obj.old - obj > tol & it < itmax) {

      it <- it + 1

      obj.old <- obj

      W <- -(wt * d2(f, y))

      rW <- sqrt(W)

      cf <- f - mn

      mat1 <- rW %*% t(rW) * K + eye

      L <- tryCatch(chol(mat1),

                    error = function(x) return(NULL))

      b <- W * cf + wt * d1(f, y)

      mat2 <- rW * (K %*% b)

      adiff <- b - rW * backsolve(L, forwardsolve(t(L), mat2)) - a

      dim(adiff) <- NULL



      # find optimum step size using Brent's method

      res <- stats::optimise(psiline, c(0, 2), adiff, a, as.matrix(K), y, d0, mn, wt)

      a <- a + res$minimum * adiff

      f <- K %*% a + mn

      obj <- psi(a, f, mn, y, d0, wt)



    }



    # recompute key components

    cf <- f - mn

    W <- -(wt * d2(f, y))

    rW <- sqrt(W)

    mat1 <- rW %*% t(rW) * K + eye

    L <- tryCatch(chol(mat1),

                  error = function(x) return(NULL))



    # return marginal negative log-likelihood

    mnll <- (a %*% cf)[1, 1] / 2 + sum(log(diag(L)) - (wt * d0(f, y)))



    # get partial gradients of the objective wrt l



    # gradient components

    W12 <- matrix(rep(rW, n), n)

    R <- W12 * backsolve(L, forwardsolve(t(L), diag(rW)))

    C <- forwardsolve(t(L), (W12 * K))



    # partial gradients of the kernel

    dK <- cov.SE.d1(x, e, l)



    # rate of change of likelihood w.r.t. the mode

    s2 <- (diag(K) - colSums(C ^ 2)) / 2 * d3(f, y)



    # vector to store gradients

    l_grads <- rep(NA, length(l))



    for (i in 1:length(l)) {



      grad <- sum(R * dK[[i]]) / 2

      grad <- grad - (t(a) %*% dK[[i]] %*% a) / 2

      b <- dK[[i]]%*% d1(f, y)

      grad <- grad - t(s2) %*% (b - K %*% (R %*% b))

      l_grads[i] <- -grad



    }



    if(verbose ) cat(paste("  ", it, "Laplace iterations\n"))

    if(it == itmax) print("timed out, don't trust the inference!")

    return(list(y = y, x = x, MAP = f, ls = l, a = a, W = W, L = L, K = K,

                e = e, obsx = x, obsy = y, mnll = mnll, wt = wt,

                l_grads = l_grads))

  }

#Fit EP----
graf.fit.ep <-

  function(y, x, mn, l, wt, e = NULL, tol = 1e-6, itmax = 50, itmin = 2,

           verbose = FALSE) {

    # fit a GRaF model using expectation-propagation

    # as implemented in the gpml matlab library

    # If parallel  = TRUE, the EP uses the parallel update

    # as implemented in GPStuff



    # whether to use parallel EP (rather than sequential)

    parallel = FALSE



    if (is.vector(x)) {

      x <- as.matrix(x)

    }



    # mn to probability scale

    mn <- stats::qnorm(mn)

    n <- length(y)

    # covariance matrix

    K <- cov.SE(x1 = x, e1 = e, e2 = NULL, l = l)

    # identity matrix

    eye <- diag(n)



    # convert observations to +1, -1, save 0, 1 version

    oldy <- y

    y <- y * 2 - 1



    # initialise



    # \tilde{\mathbf{\nu}} = \mathbf{0}

    tnu <- rep(0, n)

    # \tilde{\mathbf{\tau}} = \mathbf{0}

    ttau <- rep(0, n)

    # \mathbf{\mu} = \mathbf{0}

    mu <- rep(0, n)

    # \Sigma = \mathbf{K}  (only used in sequential EP)

    Sigma <- K



    # calculate marginal negative log likelihood at ttau = tnu = mu = 0s

    z <- mu / sqrt(1 + diag(K))

    mnll <- -sum(stats::pnorm(y * z), log.p = TRUE)



    # ~~~~~~~~~~~~~~~~~~~~~

    # set up for EP algorithm

    # set up damping factor (for parallel EP) as in GPStuff

    df <- 0.8



    # set old mnll to Inf & start iteration counter

    mnll_old <- Inf

    logM0_old <- logM0 <- rep(0, n)

    it <- 1

    converged <- FALSE



    while (!converged) {



      if (parallel) {



        # calculate key parameters

        dSigma <- diag(Sigma)

        tau <- 1 / dSigma - ttau

        nu <- 1 / dSigma * mn - tnu

        mu <- nu / tau

        sigma2 <- 1 / tau



        # get marginal moments of posterior

        lis <- ep.moments(y, sigma2, mu)



        # recalculate parameters from these moments

        # \delta\tilde{\tau} (change in \tilde{\tau})

        dttau <- 1 / lis$sigma2hat - tau - ttau



        # \tilde{\tau}

        ttau <- ttau + df * dttau



        # \delta\tilde{\nu} (change in \tilde{\nu})

        dtnu <- 1 / lis$sigma2hat * lis$muhat - nu - tnu



        # \tilde{\nu}

        tnu <- tnu + df * dtnu



      } else {

        # otherwise use sequential EP



        lis <- list(Sigma = Sigma,

                    ttau = ttau,

                    tnu = tnu,

                    mu = mu)



        # cycle through in random order

        for (i in sample(1:n)) {

          lis <- update.ep(i, y, mn, lis)

        } # end permuted for loop



        Sigma <- lis$Sigma

        ttau <- lis$ttau

        tnu <- lis$tnu

        mu <- lis$mu



        sigma2 <- diag(Sigma)



      }  # end parallel / sequential



      # recompute the approximate posterior parameters \Sigma and \mathbf{\mu}

      # using eq. 3.53 and eq. 3.68



      # sW = \tilde{S}^{\frac{1}{2}} = \sqrt{\mathbf{\tilde{\tau}}}

      sW <- sqrt(ttau)



      # L = cholesky(I_n  + \tilde{S}^{\frac{1}{2}} * K * \tilde{S}^{\frac{1}{2}})

      L <- chol(sW %*% t(sW) * K + eye)



      # V = L^T \\ \tilde{S}^{\frac{1}{2}} * K

      sWmat <- matrix(rep(sW, n), n) # byrow = TRUE?

      V <- backsolve(L, sWmat * K, transpose = TRUE)



      # \Sigma = \mathbf{K} - \mathbf{V}^T \mathbf{V}

      Sigma <- K - t(V) %*% V



      # \mathbf{\mu} = \Sigma \tilde{\mathbf{\nu}}

      mu <- Sigma %*% tnu  # + mn



      # calculate new mnll and assess convergence

      # compute logZ_{EP} using eq. 3.65, 3.73 and 3.74 and the existing L

      # \mathbf{\sigma}^2 = diag(\Sigma)

      sigma2 <- diag(Sigma)

      tau_n <- 1 / sigma2 - ttau

      nu_n <- mu / sigma2 - tnu + mn * tau_n



      z <- nu_n / tau_n / (sqrt(1 + 1 / tau_n))

      lZ <- stats::pnorm(y * z, log.p = TRUE)



      # split the final equation up into 5 bits...

      mnll.a <- sum(log(diag(L))) - sum(lZ) - t(tnu) %*% Sigma %*% tnu / 2

      mnll.b <- t(nu_n - mn * tau_n)

      mnll.c <- ((ttau / tau_n * (nu_n - mn * tau_n) - 2 * tnu) / (ttau + tau_n)) / 2

      mnll.d <- sum(tnu ^ 2 / (tau_n + ttau)) / 2

      mnll.e <- sum(log(1 + ttau / tau_n)) / 2



      mnll <- as.numeric(mnll.a - mnll.b %*% mnll.c + mnll.d - mnll.e)



      # improvement in negative log marginal likelihood

      dmnll <- abs(mnll - mnll_old)



      # improvement in log of the 0th moment

      dlogM0 <- max(abs(logM0 - logM0_old))



      # both under tolerance?

      sub_tol <- dmnll < tol & dlogM0 < tol



      if ((sub_tol & it >= itmin) | it >= itmax) {

        # stop if there was little improvement and there have been at least

        # itmin iterations or if there have been itmax or more

        # iterations

        converged <- TRUE

      } else {

        it <- it + 1

        mnll_old <- mnll

      }



    } # end while loop



    # throw an error if the iterations maxed out before convergence

    if (it >= itmax) {

      stop(paste0('maximum number of iterations (', itmax, ') reached

                  without convergence'))

    }



    # calculate posterior parameters

    sW <- sqrt(ttau)

    alpha = tnu - sW * backsolve(L, backsolve(L, sW * (K %*% tnu), transpose = TRUE))

    f = crossprod(K, alpha) + mn



    # return relevant parameters

    return (list(y = oldy,

                 x = x,

                 MAP = f,

                 ls = l,

                 a = alpha,

                 W = ttau,

                 L = L,

                 K = K,

                 e = e,

                 obsx = x,

                 obsy = oldy,

                 mnll = mnll,

                 wt = wt,

                 l_grads = NULL))

  }

#Cov.SE----
cov.SE <- function(x1, x2 = NULL, e1 = NULL, e2 = NULL, l) {

  n1 <- nrow(x1)

  n2 <- ifelse(is.null(x2), n1, nrow(x2))

  n3 <- ncol(x1)



  # distance matrices

  if(is.null(x2)) {

    e2 <- e1

    # if no second matrix do with distance matrices for speed up

    dists <- lapply(1:n3, function(i, x) dist(x[, i]) ^ 2, x1)

  } else {

    dists <- list()

    for (i in 1:n3) {

      dists[[i]] <-   x1[, i] ^ 2 %*% t(rep(1, n2)) +

        rep(1, n1) %*% t(x2[, i] ^ 2) - 2 * x1[, i] %*% t(x2[, i])

    }

  }



  # with error matrices

  if (!is.null(e1)) {

    E1 <- list()

    ones <- t(rep(1, n2))

    for (i in 1:n3) {

      E1[[i]] <- e1[, i] %*% ones

    }



    if (!is.null(e2)) {

      E2 <- list()

      ones <- t(rep(1, n1))

      for (i in 1:n3) {

        E2[[i]] <- t(e2[, i] %*% ones)

      }

    } else {

      E2 <- as.list(rep(0, n3))

    }



    # run through each covariate



    sumdiffs <- 0

    denom <- 1

    lower <- lower.tri(E1[[1]])

    for (i in 1:n3) {

      err <- E1[[i]] + E2[[i]]

      if (is.null(x2)) {

        err <- err[lower] # save only lower portion for speed up

      }

      sumdiffs <- sumdiffs + dists[[i]] / (err + l[i])

      denom <- denom * (1 + err / l[i])

    }

    # inverse kronecker delta

    ikds <- as.numeric(sumdiffs > 0)

    diag(ikds <- 1)

    denom <- sqrt(denom) * ikds

    K <- exp(-0.5 * sumdiffs) / denom



  } else {

    # without error matrices

    sumdiffs <- 0

    for (i in 1:n3) {

      sumdiffs <- sumdiffs + dists[[i]] / l[i]

    }

    K <- exp(-0.5 * sumdiffs)  # to matrix?

  }



  if(any(class(sumdiffs) %in% 'dist')) {

    K <- as.matrix(K)

    diag(K) <- 1

  }

  K

}

#d0----
d0 <-

  function(z, y) {

    if(length(y) != length(z)) y <- rep(y, length(z))

    pr <- y > 0 & y < 1

    npr <- !pr

    ans <- vector('numeric', length(y))

    y[pr] <- stats::qnorm(y[pr])

    y[npr] <- 2 * y[npr] - 1

    ans[pr] <- stats::dnorm(y[pr], z[pr], log = TRUE)

    ans[npr] <- stats::pnorm(y[npr] * z[npr], log.p = TRUE)

    ans

  }
#d1----

d1 <-

  function(z, y) {

    pr <- y > 0 & y < 1

    npr <- !pr

    ans <- vector('numeric', length(y))

    y[pr] <- stats::qnorm(y[pr])

    y[npr] <- 2 * y[npr] - 1

    ans[pr] <- y[pr] - z[pr]

    ans[npr] <- y[npr] * stats::dnorm(z[npr]) / stats::pnorm(y[npr] * z[npr])

    ans

  }

#d2----

d2 <-

  function(z, y) {

    pr <- y > 0 & y < 1

    npr <- !pr

    ans <- vector('numeric', length(y))

    y[npr] <- 2 * y[npr] - 1

    ans[pr] <- -1

    a <- stats::dnorm(z[npr]) ^ 2 / stats::pnorm(y[npr] * z[npr]) ^ 2

    b <- y[npr] * z[npr] * stats::dnorm(z[npr]) / stats::pnorm(y[npr] * z[npr])

    ans[npr] <- -a - b

    ans

  }
#d3----

d3 <- function(z, y) {

  pr <- y > 0 & y < 1

  npr <- !pr

  ans <- vector('numeric', length(y))

  y[npr] <- 2 * y[npr] - 1

  ans[pr] <- 0

  n <- stats::dnorm(z[npr])

  p <- stats::pnorm(y[npr] * z[npr])

  f <- z[npr]

  a <- n / p ^ 3

  b <- f * (y[npr] ^ 2 + 2) * n * p

  c <- y[npr] * (f ^ 2 - 1) * p ^ 2 + 2 * y[npr] * n ^ 2

  ans[npr] <- a * (b + c)

  ans

}

#psiline----
psiline <-

  function(s, adiff, a, K, y, d0, mn = 0, wt) {

    a <- a + s * as.vector(adiff)

    f <- K %*% a + mn

    psi(a, f, mn, y, d0, wt)

  }

#psi----


psi <-

  function(a, f, mn, y, d0, wt) {

    0.5 * t(a) %*% (f - mn) - sum(wt * d0(f, y))

  }
#cov.SE.d1----
cov.SE.d1 <- function (x, e = NULL, l) {

  # get gradients (matrices) of the kernel wrt. the parameters

  # CURRENTLY IGNORES e!!



  # number of parameters

  n <- length(l)



  # assign vector for gradients

  grads <- list()



  # get full covariance matrix

  K <- cov.SE(x1 = x, e1 = e, l = l)



  # loop through them

  for (i in 1:n) {



    # squared distances

    d2_i <- as.matrix(dist(x[, i]) ^ 2)



    # gradient for each parameter

    grads[[i]] <- K * (1 / l[i] ^ 2) * d2_i / 2



  }



  # return as a list

  return (grads)



}
#predict----
predict.graf.raster <-

  function (object, x, type, CI, maxn, ...) {



    # the following adapted from dismo:::predict



    variables <- colnames(object$x)



    if (is.null(CI)) {



      # if CIs aren't required

      out <- raster(x)



    } else {



      if (CI == 'std') {

        # if the SD is needed (latent case)

        out <- brick(x,

                     nl = 2)



      } else {

        # if proper CIs are required

        out <- brick(x,

                     nl = 3)



      }



    }



    ncols <- ncol(out)



    firstrow <- 1

    firstcol <- 1



    if (!canProcessInMemory(out, 3)) {



      filename <- rasterTmpFile()

      out <- writeStart(out, filename = filename, ...)

      inMemory <- FALSE



    } else {



      v <- array(NA, dim = c(nrow(out), ncol(out), nlayers(out)))

      inMemory <- TRUE



    }



    tr <- blockSize(out, n = nlayers(x) + 2)

    pb <- pbCreate(tr$n)



    for (i in 1:tr$n) {



      rr <- firstrow + tr$row[i] - 1



      rowvals <- getValuesBlock(x,

                                row = rr,

                                nrows = tr$nrows[i],

                                firstcol,

                                ncols)



      rowvals <- rowvals[, variables, drop = FALSE]



      res <- matrix(NA,

                    nrow = nrow(rowvals),

                    ncol = nlayers(out))



      rowvals <- na.omit(rowvals)



      if (length(rowvals) > 0) {



        newdata <- data.frame(rowvals)



        # loop through and convert factors to factors

        for (col in ncol(newdata)) {



          if (is.factor(object$obsx[, col])) {



            newdata[, col] <- factor(newdata[, col])



          }



        }



        # predict to this batch of pixels

        p <- predict.graf(object,

                          newdata = newdata,

                          type = type,

                          CI = CI,

                          maxn = maxn,

                          ...)



        naind <- as.vector(attr(rowvals, "na.action"))



        if (!is.null(naind)) {



          res[-naind, ] <- p



        } else {



          res <- p



        }



      }



      if (inMemory) {



        res <- array(res, dim = c(ncol(out), tr$nrows[i], nlayers(out)))

        res <- aperm(res, c(2, 1, 3))

        rows <- tr$row[i]:(tr$row[i] + tr$nrows[i] - 1)

        v[rows, , ] <- res



      } else {



        out <- writeValues(out, res, tr$row[i])



      }



      pbStep(pb, i)



    }



    pbClose(pb)



    if (inMemory) {



      # permute v so that it gets vectorized in the right direction

      v <- aperm(v, c(2, 1, 3))



      for (i in 1:nlayers(out)) {



        out <- setValues(out,

                         as.vector(v[, , i]),

                         layer = i)



      }



    } else {



      out <- writeStop(out)



    }



    names(out) <- colnames(p)



    return (out)



  }

predict.graf <-

  function(object, newdata = NULL, type = c('response', 'latent'),

           CI = 0.95, maxn = NULL, ...) {



    if (class(newdata) %in% c('Raster', 'RasterBrick', 'RasterStack')) {



      # predict to a raster

      ans <- predict.graf.raster(object = object,

                                 x = newdata,

                                 type = type,

                                 CI = CI,

                                 maxn = maxn,

                                 ...)



      return (ans)



    }



    type = match.arg(type)

    if (is.null(maxn)) maxn <- round(nrow(object$x) / 10)

    # set up data

    if (is.null(newdata)) {

      # use already set up inference data if not specified

      newdata <- object$x

      # get mean on raw data

      mn <- object$mnfun(object$obsx)



    } else {

      # convert any ints to numerics

      for(i in 1:ncol(newdata)) if (is.integer(newdata[, i])) newdata[, i] <- as.numeric(newdata[, i])



      if (is.data.frame(newdata) & all(sapply(object$obsx, class) == sapply(newdata, class))) {



        # get mean on raw data

        mn <- object$mnfun(newdata)



        k <- ncol(newdata)

        # numericize factors

        for (fac in object$facs) {

          newdata[, fac] <- as.numeric(newdata[, fac])

        }

        # convert to a matrix

        newdata <- as.matrix(newdata)

        # scale, if needed

        if (!is.null(object$scaling)) {

          notfacs <- (1:k)

          if(length(object$facs) > 0) notfacs <- notfacs[-object$facs]

          for(i in 1:length(notfacs)) {

            newdata[, notfacs[i]] <- (newdata[, notfacs[i]] - object$scaling[1, i]) / object$	scaling[2, i]

          }

        }

      } else {

        stop('newdata must be either a dataframe with the same elements as used for inference, or NULL')

      }

    }



    # check CI

    if (!is.null(CI)) {

      if (!(CI == 'std' & type == 'latent')) {

        if (CI >= 1 | CI <= 0) {

          stop("CI must be a number between 0 and 1, or NULL")

        }

        err <- stats::qnorm( 1 - (1 - CI) / 2 )

      }

    }

    # latent case

    if (type == 'latent') {



      if (is.null(CI)) {

        # if CIs aren't wanted

        ans <- pred(newdata, object, mn, std = FALSE, maxn = maxn)

        colnames(ans) <- "posterior mean"

      } else if (CI == 'std') { # if standard deviations are wanted instead

        ans <- pred(newdata, object, mn, std = TRUE, maxn = maxn)

        colnames(ans) <- c("posterior mean", "posterior std")

      } else {

        # if they are

        pred <- pred(newdata, object, mn, std = TRUE, maxn = maxn)

        upper <- pred[, 1] + err * pred[, 2]

        lower <- pred[, 1] - err * pred[, 2]

        ans <- cbind(pred[, 1], lower, upper)

        colnames(ans) <- c("posterior mean", paste("lower ", round(100 * CI), "% CI", sep = ""),

                           paste("upper ", round(100 * CI), "% CI", sep = ""))

      }

    } else {

      # response case

      if (is.null(CI)) {

        # if CIs aren't required

        ans <- stats::pnorm(pred(newdata, object, mn, std = FALSE, maxn = maxn))

        colnames(ans) <- "posterior mode"

      } else {

        # if CIs are required

        pred <- pred(newdata, object, mn, std = TRUE, maxn = maxn)

        upper <- pred[, 1] + err * pred[, 2]

        lower <- pred[, 1] - err * pred[, 2]

        ans <- stats::pnorm(cbind(pred[, 1], lower, upper))

        colnames(ans) <- c("posterior mode", paste("lower ", round(100 * CI), "% CI", sep = ""),

                           paste("upper ", round(100 * CI), "% CI", sep = ""))

      }

    }

    ans

  }

pred <-

  function(predx, fit, mn, std = TRUE, maxn = 250, same = FALSE) {

    predx <- as.matrix(predx)

    n <- nrow(predx)

    if (n > maxn & !same) {

      inds <- split(1:n, ceiling((1:n) / maxn))

      fun <- function(ind, X, fit, std, maxn) {

        pred(X[ind, , drop = FALSE], fit, mn[ind], std, maxn, same)

      }

      prediction <- lapply(inds, fun, predx, fit, std, maxn)

      prediction <- do.call('rbind', prediction)

    } else {

      if(same) {

        # if predicting back to input data re-use covariance matrix

        Kx <- fit$K

        prediction <- fit$MAP

      } else {

        Kx <- cov.SE(fit$x, predx, e1 = fit$e, l = fit$ls)

        mpred <- stats::qnorm(mn)

        prediction <- crossprod(Kx, fit$a) + mpred

      }



      if (std) {

        v <- backsolve(fit$L, sqrt(as.vector(fit$W)) * Kx, transpose = T)

        # using correlation matrix, so diag(kxx) is all 1s, no need to compute kxx

        predvar <- 1 - crossprod(v)

        prediction <- cbind(prediction, sqrt(diag(predvar)))

        colnames(prediction) <- c("MAP", "std")

      }

    }

    prediction

  }
